

# Peptidoform level analysis #


source('D:/Pipeline comparisons/Writing/R Functions/MQ Binomial Scores Function Peptidoform level MQ.R')
source('D:/Pipeline comparisons/Writing/R Functions/MQ FLR function Bin Adjusted MQ.R')
source('D:/Pipeline comparisons/Writing/R Functions/Function frequency of site MaxQuant.R')



# We want to assess if there are any differences between the Max and MM collapsing methods using all rice data sets #

#> [conflicted] Will prefer dplyr::filter over any other package
suppressPackageStartupMessages(library("tidyverse"))

library(dplyr)
library(stringr)
library(useful)
library(MASS)
library(reshape2)
library(epiDisplay)

# First we calculate all binomial adjusted data #
#################################################

PXD000923A <- read.csv(file = 'D:/Pipeline comparisons/Writing/Data/MQ/Rice/MQ_PXD000923_A_PSMSITE.csv')
PXD002222A <- read.csv(file = 'D:/Pipeline comparisons/Writing/Data/MQ/Rice/MQ_PXD002222_A_PSMSITE.csv')
PXD002756A <- read.csv(file = 'D:/Pipeline comparisons/Writing/Data/MQ/Rice/MQ_PXD002756_A_PSMSITE.csv')
PXD004705A <- read.csv(file = 'D:/Pipeline comparisons/Writing/Data/MQ/Rice/MQ_PXD004705_A_PSMSITE.csv')
PXD004939A <- read.csv(file = 'D:/Pipeline comparisons/Writing/Data/MQ/Rice/MQ_PXD004939_A_PSMSITE.csv')
PXD005241A <- read.csv(file = 'D:/Pipeline comparisons/Writing/Data/MQ/Rice/MQ_PXD005241_A_PSMSITE.csv')
PXD012764A <- read.csv(file = 'D:/Pipeline comparisons/Writing/Data/MQ/Rice/MQ_PXD012764_A_PSMSITE.csv')
PXD019291A <- read.csv(file = 'D:/Pipeline comparisons/Writing/Data/MQ/Rice/MQ_PXD019291_A_PSMSITE.csv')

PXD000923 <- read.csv(file = 'D:/Pipeline comparisons/Writing/Data/MQ/Rice/MQ_PXD000923_PSMSITE.csv')
PXD002222 <- read.csv(file = 'D:/Pipeline comparisons/Writing/Data/MQ/Rice/MQ_PXD002222_PSMSITE.csv')
PXD002756 <- read.csv(file = 'D:/Pipeline comparisons/Writing/Data/MQ/Rice/MQ_PXD002756_PSMSITE.csv')
PXD004705 <- read.csv(file = 'D:/Pipeline comparisons/Writing/Data/MQ/Rice/MQ_PXD004705_PSMSITE.csv')
PXD004939 <- read.csv(file = 'D:/Pipeline comparisons/Writing/Data/MQ/Rice/MQ_PXD004939_PSMSITE.csv')
PXD005241 <- read.csv(file = 'D:/Pipeline comparisons/Writing/Data/MQ/Rice/MQ_PXD005241_PSMSITE.csv')
PXD012764 <- read.csv(file = 'D:/Pipeline comparisons/Writing/Data/MQ/Rice/MQ_PXD012764_PSMSITE.csv')
PXD019291 <- read.csv(file = 'D:/Pipeline comparisons/Writing/Data/MQ/Rice/MQ_PXD019291_PSMSITE.csv')

PXD000923A$dataset <- "PXD000923"
PXD002222A$dataset <- "PXD002222"
PXD002756A$dataset <- "PXD002756"
PXD004705A$dataset <- "PXD004705"
PXD004939A$dataset <- "PXD004939"
PXD005241A$dataset <- "PXD005241"
PXD012764A$dataset <- "PXD012764"
PXD019291A$dataset <- "PXD019291"

PXD000923$dataset <- "PXD000923"
PXD002222$dataset <- "PXD002222"
PXD002756$dataset <- "PXD002756"
PXD004705$dataset <- "PXD004705"
PXD004939$dataset <- "PXD004939"
PXD005241$dataset <- "PXD005241"
PXD012764$dataset <- "PXD012764"
PXD019291$dataset <- "PXD019291"


PXD000923A$Unique_scan <- paste0(PXD000923A$Spectrum,"_",PXD000923A$Retention.time)
PXD002222A$Unique_scan <- paste0(PXD002222A$Spectrum,"_",PXD002222A$Retention.time)
PXD002756A$Unique_scan <- paste0(PXD002756A$Spectrum,"_",PXD002756A$Retention.time)
PXD004705A$Unique_scan <- paste0(PXD004705A$Spectrum,"_",PXD004705A$Retention.time)
PXD004939A$Unique_scan <- paste0(PXD004939A$Spectrum,"_",PXD004939A$Retention.time)
PXD005241A$Unique_scan <- paste0(PXD005241A$Spectrum,"_",PXD005241A$Retention.time)
PXD012764A$Unique_scan <- paste0(PXD012764A$Spectrum,"_",PXD012764A$Retention.time)
PXD019291A$Unique_scan <- paste0(PXD019291A$Spectrum,"_",PXD019291A$Retention.time)

PXD000923$Unique_scan <- paste0(PXD000923$Spectrum,"_",PXD000923$Retention.time)
PXD002222$Unique_scan <- paste0(PXD002222$Spectrum,"_",PXD002222$Retention.time)
PXD002756$Unique_scan <- paste0(PXD002756$Spectrum,"_",PXD002756$Retention.time)
PXD004705$Unique_scan <- paste0(PXD004705$Spectrum,"_",PXD004705$Retention.time)
PXD004939$Unique_scan <- paste0(PXD004939$Spectrum,"_",PXD004939$Retention.time)
PXD005241$Unique_scan <- paste0(PXD005241$Spectrum,"_",PXD005241$Retention.time)
PXD012764$Unique_scan <- paste0(PXD012764$Spectrum,"_",PXD012764$Retention.time)
PXD019291$Unique_scan <- paste0(PXD019291$Spectrum,"_",PXD019291$Retention.time)

# pSTY peptidoform level #
##########################

PXD000923$Peptidoform <- paste0(PXD000923$Modified.sequence,"_",PXD000923$PTM.positions)
PXD002222$Peptidoform <- paste0(PXD002222$Modified.sequence,"_",PXD002222$PTM.positions)
PXD002756$Peptidoform <- paste0(PXD002756$Modified.sequence,"_",PXD002756$PTM.positions)
PXD004705$Peptidoform <- paste0(PXD004705$Modified.sequence,"_",PXD004705$PTM.positions)
PXD004939$Peptidoform <- paste0(PXD004939$Modified.sequence,"_",PXD004939$PTM.positions)
PXD005241$Peptidoform <- paste0(PXD005241$Modified.sequence,"_",PXD005241$PTM.positions)
PXD012764$Peptidoform <- paste0(PXD012764$Modified.sequence,"_",PXD012764$PTM.positions)
PXD019291$Peptidoform <- paste0(PXD019291$Modified.sequence,"_",PXD019291$PTM.positions)


PXD000923$PROTEIN_LOC <- paste0(PXD000923$Protein,"_",PXD000923$PROTEIN_POS_NUM)
PXD002222$PROTEIN_LOC <- paste0(PXD002222$Protein,"_",PXD002222$PROTEIN_POS_NUM)
PXD002756$PROTEIN_LOC <- paste0(PXD002756$Protein,"_",PXD002756$PROTEIN_POS_NUM)
PXD004705$PROTEIN_LOC <- paste0(PXD004705$Protein,"_",PXD004705$PROTEIN_POS_NUM)
PXD004939$PROTEIN_LOC <- paste0(PXD004939$Protein,"_",PXD004939$PROTEIN_POS_NUM)
PXD005241$PROTEIN_LOC <- paste0(PXD005241$Protein,"_",PXD005241$PROTEIN_POS_NUM)
PXD012764$PROTEIN_LOC <- paste0(PXD012764$Protein,"_",PXD012764$PROTEIN_POS_NUM)
PXD019291$PROTEIN_LOC <- paste0(PXD019291$Protein,"_",PXD019291$PROTEIN_POS_NUM)

# Data collapsed by taking the Max by peptidoform #

PXD000923_pform <- PXD000923 %>% group_by(Peptidoform) %>% top_n(1, PTM_final_prob)
PXD002222_pform <- PXD002222 %>% group_by(Peptidoform) %>% top_n(1, PTM_final_prob)
PXD002756_pform <- PXD002756 %>% group_by(Peptidoform) %>% top_n(1, PTM_final_prob)
PXD004705_pform <- PXD004705 %>% group_by(Peptidoform) %>% top_n(1, PTM_final_prob)
PXD004939_pform <- PXD004939 %>% group_by(Peptidoform) %>% top_n(1, PTM_final_prob)
PXD005241_pform <- PXD005241 %>% group_by(Peptidoform) %>% top_n(1, PTM_final_prob)
PXD012764_pform <- PXD012764 %>% group_by(Peptidoform) %>% top_n(1, PTM_final_prob)
PXD019291_pform <- PXD019291 %>% group_by(Peptidoform) %>% top_n(1, PTM_final_prob)


# pASTY peptidoform level #
##########################


library(plyr)

PXD000923A_pform <- binAdjustPformMQ(PXD000923A)
PXD002222A_pform <- binAdjustPformMQ(PXD002222A)
PXD002756A_pform <- binAdjustPformMQ(PXD002756A)
PXD004705A_pform <- binAdjustPformMQ(PXD004705A)
PXD004939A_pform <- binAdjustPformMQ(PXD004939A)
PXD005241A_pform <- binAdjustPformMQ(PXD005241A)
PXD012764A_pform <- binAdjustPformMQ(PXD012764A)
PXD019291A_pform <- binAdjustPformMQ(PXD019291A)

detach(package:plyr)


PXD000923A_pform <- FLR_AdjMQ(PXD000923A_pform)
PXD002222A_pform <- FLR_AdjMQ(PXD002222A_pform)
PXD002756A_pform <- FLR_AdjMQ(PXD002756A_pform)
PXD004705A_pform <- FLR_AdjMQ(PXD004705A_pform)
PXD004939A_pform <- FLR_AdjMQ(PXD004939A_pform)
PXD005241A_pform <- FLR_AdjMQ(PXD005241A_pform)
PXD012764A_pform <- FLR_AdjMQ(PXD012764A_pform)
PXD019291A_pform <- FLR_AdjMQ(PXD019291A_pform)


write.csv(PXD000923_pform, "D:/Pipeline comparisons/Writing/Data/MQ/Rice/pform/MQ_PXD000923_pform.csv")
write.csv(PXD002222_pform, "D:/Pipeline comparisons/Writing/Data/MQ/Rice/pform/MQ_PXD002222_pform.csv")
write.csv(PXD002756_pform, "D:/Pipeline comparisons/Writing/Data/MQ/Rice/pform/MQ_PXD002756_pform.csv")
write.csv(PXD004705_pform, "D:/Pipeline comparisons/Writing/Data/MQ/Rice/pform/MQ_PXD004705_pform.csv")
write.csv(PXD004939_pform, "D:/Pipeline comparisons/Writing/Data/MQ/Rice/pform/MQ_PXD004939_pform.csv")
write.csv(PXD005241_pform, "D:/Pipeline comparisons/Writing/Data/MQ/Rice/pform/MQ_PXD005241_pform.csv")
write.csv(PXD012764_pform, "D:/Pipeline comparisons/Writing/Data/MQ/Rice/pform/MQ_PXD012764_pform.csv")
write.csv(PXD019291_pform, "D:/Pipeline comparisons/Writing/Data/MQ/Rice/pform/MQ_PXD019291_pform.csv")

write.csv(PXD000923A_pform, "D:/Pipeline comparisons/Writing/Data/MQ/Rice/pform/MQ_PXD000923A_pform.csv")
write.csv(PXD002222A_pform, "D:/Pipeline comparisons/Writing/Data/MQ/Rice/pform/MQ_PXD002222A_pform.csv")
write.csv(PXD002756A_pform, "D:/Pipeline comparisons/Writing/Data/MQ/Rice/pform/MQ_PXD002756A_pform.csv")
write.csv(PXD004705A_pform, "D:/Pipeline comparisons/Writing/Data/MQ/Rice/pform/MQ_PXD004705A_pform.csv")
write.csv(PXD004939A_pform, "D:/Pipeline comparisons/Writing/Data/MQ/Rice/pform/MQ_PXD004939A_pform.csv")
write.csv(PXD005241A_pform, "D:/Pipeline comparisons/Writing/Data/MQ/Rice/pform/MQ_PXD005241A_pform.csv")
write.csv(PXD012764A_pform, "D:/Pipeline comparisons/Writing/Data/MQ/Rice/pform/MQ_PXD012764A_pform.csv")
write.csv(PXD019291A_pform, "D:/Pipeline comparisons/Writing/Data/MQ/Rice/pform/MQ_PXD019291A_pform.csv")

############################################################################################################################################
# COMPARISON
############################################################################################################################################




AllRice_pASTY_pform <-dplyr::bind_rows(PXD000923A_pform,PXD002222A_pform, PXD002756A_pform, PXD004705A_pform,PXD004939A_pform, PXD005241A_pform,
                                       PXD012764A_pform,PXD019291A_pform)

AllRice_pASTY_pform_Excluding_A <- AllRice_pASTY_pform[AllRice_pASTY_pform$Amino!="A",]

tab1(AllRice_pASTY_pform_Excluding_A$dataset)

AllRice_pSTY_pform <-dplyr::bind_rows(PXD000923_pform,PXD002222_pform, PXD002756_pform, PXD004705_pform,PXD004939_pform, PXD005241_pform,
                                       PXD012764_pform,PXD019291_pform)

tab1(AllRice_pSTY_pform$dataset)

PXD000923A_pform_01 <- PXD000923A_pform[1:max(which(PXD000923A_pform$FLR_Adj_Score<=0.01)),]
PXD002222A_pform_01 <- PXD002222A_pform[1:max(which(PXD002222A_pform$FLR_Adj_Score<=0.01)),]
PXD002756A_pform_01 <- PXD002756A_pform[1:max(which(PXD002756A_pform$FLR_Adj_Score<=0.01)),]
PXD004705A_pform_01 <- PXD004705A_pform[1:max(which(PXD004705A_pform$FLR_Adj_Score<=0.01)),]
PXD004939A_pform_01 <- PXD004939A_pform[1:max(which(PXD004939A_pform$FLR_Adj_Score<=0.01)),]
PXD005241A_pform_01 <- PXD005241A_pform[1:max(which(PXD005241A_pform$FLR_Adj_Score<=0.01)),]
PXD012764A_pform_01 <- PXD012764A_pform[1:max(which(PXD012764A_pform$FLR_Adj_Score<=0.01)),]
PXD019291A_pform_01 <- PXD019291A_pform[1:max(which(PXD019291A_pform$FLR_Adj_Score<=0.01)),]

PXD000923A_pform_02.5 <- PXD000923A_pform[1:max(which(PXD000923A_pform$FLR_Adj_Score<=0.025)),]
PXD002222A_pform_02.5 <- PXD002222A_pform[1:max(which(PXD002222A_pform$FLR_Adj_Score<=0.025)),]
PXD002756A_pform_02.5 <- PXD002756A_pform[1:max(which(PXD002756A_pform$FLR_Adj_Score<=0.025)),]
PXD004705A_pform_02.5 <- PXD004705A_pform[1:max(which(PXD004705A_pform$FLR_Adj_Score<=0.025)),]
PXD004939A_pform_02.5 <- PXD004939A_pform[1:max(which(PXD004939A_pform$FLR_Adj_Score<=0.025)),]
PXD005241A_pform_02.5 <- PXD005241A_pform[1:max(which(PXD005241A_pform$FLR_Adj_Score<=0.025)),]
PXD012764A_pform_02.5 <- PXD012764A_pform[1:max(which(PXD012764A_pform$FLR_Adj_Score<=0.025)),]
PXD019291A_pform_02.5 <- PXD019291A_pform[1:max(which(PXD019291A_pform$FLR_Adj_Score<=0.025)),]

PXD000923A_pform_05 <- PXD000923A_pform[1:max(which(PXD000923A_pform$FLR_Adj_Score<=0.05)),]
PXD002222A_pform_05 <- PXD002222A_pform[1:max(which(PXD002222A_pform$FLR_Adj_Score<=0.05)),]
PXD002756A_pform_05 <- PXD002756A_pform[1:max(which(PXD002756A_pform$FLR_Adj_Score<=0.05)),]
PXD004705A_pform_05 <- PXD004705A_pform[1:max(which(PXD004705A_pform$FLR_Adj_Score<=0.05)),]
PXD004939A_pform_05 <- PXD004939A_pform[1:max(which(PXD004939A_pform$FLR_Adj_Score<=0.05)),]
PXD005241A_pform_05 <- PXD005241A_pform[1:max(which(PXD005241A_pform$FLR_Adj_Score<=0.05)),]
PXD012764A_pform_05 <- PXD012764A_pform[1:max(which(PXD012764A_pform$FLR_Adj_Score<=0.05)),]
PXD019291A_pform_05 <- PXD019291A_pform[1:max(which(PXD019291A_pform$FLR_Adj_Score<=0.05)),]


AllRice_pASTY_pform_01<-dplyr::bind_rows(PXD000923A_pform_01, PXD002222A_pform_01, PXD002756A_pform_01, PXD004705A_pform_01,
                                     PXD004939A_pform_01, PXD005241A_pform_01, PXD012764A_pform_01, PXD019291A_pform_01)

AllRice_pASTY_pform_02.5<-dplyr::bind_rows(PXD000923A_pform_02.5, PXD002222A_pform_02.5, PXD002756A_pform_02.5, PXD004705A_pform_02.5,
                                         PXD004939A_pform_02.5, PXD005241A_pform_02.5, PXD012764A_pform_02.5, PXD019291A_pform_02.5)

AllRice_pASTY_pform_05<-dplyr::bind_rows(PXD000923A_pform_05, PXD002222A_pform_05, PXD002756A_pform_05, PXD004705A_pform_05,
                                       PXD004939A_pform_05, PXD005241A_pform_05, PXD012764A_pform_05, PXD019291A_pform_05)

AllRice_pASTY_pform_01_Excluding_A <- AllRice_pASTY_pform_01[AllRice_pASTY_pform_01$Amino!="A",]

tab1(AllRice_pASTY_pform_01_Excluding_A$dataset)

AllRice_pASTY_pform_02.5_Excluding_A <- AllRice_pASTY_pform_02.5[AllRice_pASTY_pform_02.5$Amino!="A",]

tab1(AllRice_pASTY_pform_02.5_Excluding_A$dataset)

AllRice_pASTY_pform_05_Excluding_A <- AllRice_pASTY_pform_05[AllRice_pASTY_pform_05$Amino!="A",]

tab1(AllRice_pASTY_pform_05_Excluding_A$dataset)


PXD000923_pform_95 <- PXD000923_pform[PXD000923_pform$PTM_final_prob>=0.95,]
PXD002222_pform_95 <- PXD002222_pform[PXD002222_pform$PTM_final_prob>=0.95,]
PXD002756_pform_95 <- PXD002756_pform[PXD002756_pform$PTM_final_prob>=0.95,]
PXD004705_pform_95 <- PXD004705_pform[PXD004705_pform$PTM_final_prob>=0.95,]
PXD004939_pform_95 <- PXD004939_pform[PXD004939_pform$PTM_final_prob>=0.95,]
PXD005241_pform_95 <- PXD005241_pform[PXD005241_pform$PTM_final_prob>=0.95,]
PXD012764_pform_95 <- PXD012764_pform[PXD012764_pform$PTM_final_prob>=0.95,]
PXD019291_pform_95 <- PXD019291_pform[PXD019291_pform$PTM_final_prob>=0.95,]

PXD000923_pform_97.5 <- PXD000923_pform[PXD000923_pform$PTM_final_prob>=0.975,]
PXD002222_pform_97.5 <- PXD002222_pform[PXD002222_pform$PTM_final_prob>=0.975,]
PXD002756_pform_97.5 <- PXD002756_pform[PXD002756_pform$PTM_final_prob>=0.975,]
PXD004705_pform_97.5 <- PXD004705_pform[PXD004705_pform$PTM_final_prob>=0.975,]
PXD004939_pform_97.5 <- PXD004939_pform[PXD004939_pform$PTM_final_prob>=0.975,]
PXD005241_pform_97.5 <- PXD005241_pform[PXD005241_pform$PTM_final_prob>=0.975,]
PXD012764_pform_97.5 <- PXD012764_pform[PXD012764_pform$PTM_final_prob>=0.975,]
PXD019291_pform_97.5 <- PXD019291_pform[PXD019291_pform$PTM_final_prob>=0.975,]

PXD000923_pform_99 <- PXD000923_pform[PXD000923_pform$PTM_final_prob>=0.99,]
PXD002222_pform_99 <- PXD002222_pform[PXD002222_pform$PTM_final_prob>=0.99,]
PXD002756_pform_99 <- PXD002756_pform[PXD002756_pform$PTM_final_prob>=0.99,]
PXD004705_pform_99 <- PXD004705_pform[PXD004705_pform$PTM_final_prob>=0.99,]
PXD004939_pform_99 <- PXD004939_pform[PXD004939_pform$PTM_final_prob>=0.99,]
PXD005241_pform_99 <- PXD005241_pform[PXD005241_pform$PTM_final_prob>=0.99,]
PXD012764_pform_99 <- PXD012764_pform[PXD012764_pform$PTM_final_prob>=0.99,]
PXD019291_pform_99 <- PXD019291_pform[PXD019291_pform$PTM_final_prob>=0.99,]


AllRice_pSTY_pform_95<-dplyr::bind_rows(PXD000923_pform_95, PXD002222_pform_95, PXD002756_pform_95, PXD004705_pform_95,
                                         PXD004939_pform_95, PXD005241_pform_95, PXD012764_pform_95, PXD019291_pform_95)

AllRice_pSTY_pform_97.5<-dplyr::bind_rows(PXD000923_pform_97.5, PXD002222_pform_97.5, PXD002756_pform_97.5, PXD004705_pform_97.5,
                                        PXD004939_pform_97.5, PXD005241_pform_97.5, PXD012764_pform_97.5, PXD019291_pform_97.5)

AllRice_pSTY_pform_99<-dplyr::bind_rows(PXD000923_pform_99, PXD002222_pform_99, PXD002756_pform_99, PXD004705_pform_99,
                                        PXD004939_pform_99, PXD005241_pform_99, PXD012764_pform_99, PXD019291_pform_99)



tab1(AllRice_pSTY_pform_95$dataset)

tab1(AllRice_pSTY_pform_97.5$dataset)

tab1(AllRice_pSTY_pform_99$dataset)

